library(astsa)
library(xts)
library(forecast)
Any metric that is measured over regular time intervals makes a time Series. Example: weather data, stock prices, industry forecasts, etc.
How to create a Time Series in R? - import data into R
par(mfrow=c(3,1))
inputdata = rnorm(100,0,1)
plot(ts(inputdata,frequency=12,start=1959),ylab='Monthly')
plot(ts(inputdata,frequency=4,start=1959),ylab='Quarterly')
plot(ts(inputdata,frequency=1,start=1959),ylab='Yearly')
# Example 1: yearly average global temperature deviations (1880-2009) in degress centigrade
#require(astsa)
par(mar=c(4,4,2,0.5)) #bottom, left, top, and right 四周留白的距离
plot(globtemp, type='o',ylab = 'Global Temperature Deviation',col='blue')
# The apparent upward trend in the series during the latter part of the twentieth century has been used as an argument for the global warming hypothesis.
# There was a leveling off at about 1935 and then another sharp upward trend at about 1970.
# The question of interest is whether the overall trend is natural or it is caused by some human-induced interface.
# Decomposition of time series
tsData = EuStockMarkets[,1]
tsmulti = decompose(tsData,type='multiplicative') # multiplicative
tsaddti = decompose(tsData,type='additive') # addtive
plot(tsmulti)
plot(tsaddti)
stlRes = stl(tsData,s.window = 'periodic')
#stlRes
## Call:
## stl(x = tsData, s.window = "periodic")
##
## Components
## Time Series:
## Start = c(1991, 130)
## End = c(1998, 169)
## Frequency = 260
## seasonal trend remainder
## 1991.496 43.1900952 1602.604 -17.0445950
## 1991.500 55.3795008 1603.064 -44.8134914
## 1991.504 61.2914064 1603.523 -58.3048878
## 1991.508 68.4470620 1603.983 -51.3900342
## 1991.512 68.4527176 1604.442 -54.7351806
## 1991.515 70.8396232 1604.902 -65.1315770
jj=astsa::jj
dejj=stl(jj, s.window = "periodic", robust = 'TRUE')
plot(jj)
plot(HoltWinters(jj)) #Computes Holt-Winters Filtering of a given time series. Unknown parameters are determined by minimizing the squared prediction error.
#need packages
library('pander')
library('gridExtra')
library('devtools')
library('zoo')
library('latticeExtra')
library('lattice')
library('TSA')
Example of Time Series
# JJ Quarterly Earnings per share (84 quarters in 21 years from 1960-1980)
plot(jj,type='o', xlab = 'Time',ylab='Earnings Per Share')
#In this case, note the gradually increasing underlying trend and the rather regular variation superimposed on the trend that seems to repeat over quarters.逐渐增加的潜在趋势和相当有规律的变化叠加在趋势上,在季度重复
# Financial Times Series
djiar = diff(log(djia$Close))[-1]
# approximate returns
plot(djiar, main="DJIA Returns")
Given is a time series as a sequence of random variables, \(x_1,x_2, x_3 \dots\)
set.seed(154) # so you can reproduce the results
w = rnorm(200); x = cumsum(w) # two commands in one line
wd = w +.2; xd = cumsum(wd)
plot.ts(xd, ylim=c(-5,55), main="random walk", ylab='')
lines(x, col=4); abline(h=0, col=4, lty=2); abline(a=0, b=.2, lty=2)
legend('topleft',col=c('black','blue'),c("drift=0.2","drift=0"),lty=1)
# set.seed(303)
# xx = rnorm(101,sd=1.5)
# ww = rnorm(101,sd=1.5)
# for(t in 2:101) { xx[t] = xx[t-1] + ww[t] +0.2}
# plot.ts(xx[2:101],main='random walk')
w = rnorm(500,0,1) # 500 N(0,1) variates
v = filter(w, sides=2, filter=rep(1/3,3)) # moving average
# side = 2 past+future value, side = 1 past value
par(mfrow=c(2,1))
plot.ts(w, main="white noise")
plot.ts(v, ylim=c(-3,3), main="moving average")
#Example 1.10 p11
w = rnorm(550,0,1) # 50 extra to avoid startup problems
x = filter(w, filter=c(1,-.9), method="recursive")[-(1:50)] # remove first 50
plot.ts(x, main="autoregression")
cs = 2*cos(2*pi*1:500/50 + .6*pi)
w = rnorm(500,0,1)
par(mfrow=c(3,1), mar=c(3,2,2,1), cex.main=1.5)
plot.ts(cs, main=expression(2*cos(2*pi*t/50+.6*pi)))
plot.ts(cs+w, main=expression(2*cos(2*pi*t/50+.6*pi) + N(0,1)))
plot.ts(cs+5*w, main=expression(2*cos(2*pi*t/50+.6*pi) + N(0,25)))
covariance * \(cov(X+Y,Z)=cov(X,Z)+cov(Y,Z)\) * \(cov(\alpha X,Y)=\alpha cov(X,Y)\) * if X,Y idd, \(cov(X,Y)=0\)
#Random process with mean 0 and standard deviation 1.5
eps <- rnorm(400, mean = 0, sd = 1)
mu <- 5 # the constant mean
# Process
X_t <- mu + eps
# Plot
ts.plot(X_t, main = "Example of (random) stationary time series", ylab = expression(X[t]))
summary(fit <- lm(chicken~time(chicken), na.action=NULL))
##
## Call:
## lm(formula = chicken ~ time(chicken), na.action = NULL)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.7411 -3.4730 0.8251 2.7738 11.5804
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.131e+03 1.624e+02 -43.91 <2e-16 ***
## time(chicken) 3.592e+00 8.084e-02 44.43 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.696 on 178 degrees of freedom
## Multiple R-squared: 0.9173, Adjusted R-squared: 0.9168
## F-statistic: 1974 on 1 and 178 DF, p-value: < 2.2e-16
plot(chicken, ylab="cents per pound",col='blue')
abline(fit) # add the fitted line
##### Using Regression to Discover a Signal in Noise
set.seed(90210) # so you can reproduce these results
x = 2*cos(2*pi*1:500/50 + .6*pi) + rnorm(500,0,5)
z1 = cos(2*pi*1:500/50)
z2 = sin(2*pi*1:500/50)
summary(fit <- lm(x~0+z1+z2)) # zero to exclude the intercept
##
## Call:
## lm(formula = x ~ 0 + z1 + z2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -14.8584 -3.8525 -0.3186 3.3487 15.5440
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## z1 -0.7442 0.3274 -2.273 0.0235 *
## z2 -1.9949 0.3274 -6.093 2.23e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.177 on 498 degrees of freedom
## Multiple R-squared: 0.07827, Adjusted R-squared: 0.07456
## F-statistic: 21.14 on 2 and 498 DF, p-value: 1.538e-09
par(mfrow=c(2,1))
plot.ts(x)
plot.ts(x, col=8, ylab=expression(hat(x)))
lines(fitted(fit), col=2)
set.seed(101)
#plot of Auto-regression
# alpha = constants
alpha = 0.5
# random process with mean 0 and standard dev=1.5
Z <- rnorm(100, mean = 0, sd = 1.5)
# seed
X <- rnorm(1)
# process
for (i in 2:length(Z)){
X[i] <- 0.7*X[i-1]+Z[i] }
# plot
ts.plot(X)
par(mfrow=c(2,1))
acf(X)
pacf(X)
par(mfrow=c(2,1))
plot(arima.sim(list(order=c(1,0,0), ar=.9), n=100), ylab="x",
main=(expression(AR(1)~~~phi==+.9)))
plot(arima.sim(list(order=c(1,0,0), ar=-.9), n=100), ylab="x",
main=(expression(AR(1)~~~phi==-.9)))
set.seed(101)
#plot of Moving average
# theta = constants
theta = 0.5
# random process with mean 0 and standard dev=1.5
Z <- rnorm(100, mean = 0, sd = 1.5)
# seed
X <- rnorm(1)
# process
for (i in 2:length(Z)){
X[i] <- theta*Z[i-1]+Z[i] }
# plot
ts.plot(X)
par(mfrow=c(2,1))
acf(X)
pacf(X)
par(mfrow=c(2,1))
plot(arima.sim(n = 100, model = list(MA = c(0.9)), sd = 1),main=(expression(MA(1)~~~theta==+.9)))
plot(arima.sim(n = 100, model = list(MA = c(-0.9)), sd = 1),main=(expression(MA(1)~~~theta==-.9)))
acf(arima.sim(n = 100, model = list(MA = c(0.9))),100)
## Causality, Invertibility tsa4 P88
The introdu of ARIMA
p is the number of autoregressive terms, d is the number of nonseasonal differences needed for stationarity, and q is the number of lagged forecast errors in the prediction equation.
The patterns of ACF and PACF for stationary AR(P) and MA(q) processes are 1. The ACF of AR(p) eventually decays exponentially toward zero; the PACF of AR(p) is truncated (becoming zero) after the p-th lag. 2. The ACF of MA(q) is truncated (becoming zero) after the q-th lag; the PACF of MA(q) eventually decays exponentially toward zero. An extremely slow-decaying ACF is signal for nonstationarity, which can be formally verified by unit root tests
##tsdisplay: https://rstudio-pubs-static.s3.amazonaws.com/477861_d23ca8d9390d407aab43c0171f70b72e.html
Step 1: stationary
library(astsa)
library(forecast)
tsdisplay(globtemp) # d =1
tseries::adf.test(globtemp)
##
## Augmented Dickey-Fuller Test
##
## data: globtemp
## Dickey-Fuller = -1.6219, Lag order = 5, p-value = 0.7338
## alternative hypothesis: stationary
# null hypothesis is not stationary.
# P-value is greater than than the significance level,fail to reject the null hypothesis,then it is not stationary
# KPSS Test for Level Stationarity
tseries::kpss.test(globtemp)
## Warning in tseries::kpss.test(globtemp): p-value smaller than printed p-
## value
##
## KPSS Test for Level Stationarity
##
## data: globtemp
## KPSS Level = 2.3334, Truncation lag parameter = 4, p-value = 0.01
# null hypothesis for the test is that the data is stationary.
# reject null - is not stationary
Step 2: find d
df.ts = WWWusage
par(mfcol=c(3,2))
plot(df.ts)
df1=diff(df.ts)
df2=diff(diff(df.ts))
plot(df1, main ="1st Order Differencing")
plot(df2, main = "2nd Order Differencing")
acf(df.ts)
ACFdf1 <- acf(df1, plot = FALSE)
plot(ACFdf1, main ="1st Order Differencing")
ACFdf2= acf(df2,plot = FALSE)
plot(ACFdf2, main = "2nd Order Differencing")
# d = 0,1
acf(df2,plot = T,type="covariance")
* For the above series, the time series reaches stationarity with two orders of differencing. * But on looking at the autocorrelation plot for the 2nd differencing the lag goes into the far negative zone fairly quick, which indicates, the series might have been over differenced. * So, I am going to tentatively fix the order of differencing as 1 even though the series is not perfectly stationary (weak stationarity).
Step 3: find p
pacf(df1)
# p = 0,1,2
Step 4: find q
acf(df1)
Step 5: q+d+p <= 6 * overall, d = 0,1 & p = 0,1,2 & q = 2,3 * Let’s fit ARIMA(1,1,1), ARIMA(1,1,2), and ARIMA(1,1,3)
ARIMA(1,1,1)
fit1 <- Arima(df.ts, order=c(1, 1, 1),include.drift = TRUE)
summary(fit1)
## Series: df.ts
## ARIMA(1,1,1) with drift
##
## Coefficients:
## ar1 ma1 drift
## 0.6344 0.5297 1.1205
## s.e. 0.0866 0.0893 1.2860
##
## sigma^2 estimated as 10.03: log likelihood=-253.79
## AIC=515.58 AICc=516 BIC=525.96
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.04803303 3.10304 2.395923 0.073378 1.914083 0.5294563
## ACF1
## Training set -0.007904884
predCI = forecast(fit1) #also gives you C.I
autoplot(forecast(fit1))
ARIMA(1,1,2)
fit2 <- Arima(df.ts, order=c(1, 1, 2),include.drift = TRUE)
summary(fit2)
## Series: df.ts
## ARIMA(1,1,2) with drift
##
## Coefficients:
## ar1 ma1 ma2 drift
## 0.6353 0.5284 -0.0012 1.1206
## s.e. 0.2570 0.3550 0.3213 1.2905
##
## sigma^2 estimated as 10.14: log likelihood=-253.79
## AIC=517.58 AICc=518.22 BIC=530.55
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.04794822 3.103043 2.395902 0.07346461 1.914038 0.5294516
## ACF1
## Training set -0.007698769
predCI = forecast(fit2) #also gives you C.I
autoplot(forecast(fit2))
ARIMA(1,1,3)
fit3 <- Arima(df.ts, order=c(1, 1, 3),method="ML",include.drift = TRUE)
summary(fit3)
## Series: df.ts
## ARIMA(1,1,3) with drift
##
## Coefficients:
## ar1 ma1 ma2 ma3 drift
## 0.8439 0.3517 -0.2766 -0.2468 1.0287
## s.e. 0.1009 0.1449 0.1516 0.1317 1.5726
##
## sigma^2 estimated as 9.883: log likelihood=-252.09
## AIC=516.18 AICc=517.1 BIC=531.75
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.06382787 3.047965 2.37836 0.1233786 1.904524 0.525575
## ACF1
## Training set -0.03441199
predCI = forecast(fit3) #also gives you C.I
autoplot(forecast(fit3))
Step 6: compare the AIC and BIC values.
data.frame('AIC'= c(AIC(fit1),AIC(fit2),AIC(fit3)),'BIC'=c(BIC(fit1),BIC(fit2),BIC(fit3)),row.names=c("fit1","fit2","fit3"))
## AIC BIC
## fit1 515.5793 525.9598
## fit2 517.5793 530.5549
## fit3 516.1822 531.7529
summary(auto.arima(df.ts))
## Series: df.ts
## ARIMA(1,1,1)
##
## Coefficients:
## ar1 ma1
## 0.6504 0.5256
## s.e. 0.0842 0.0896
##
## sigma^2 estimated as 9.995: log likelihood=-254.15
## AIC=514.3 AICc=514.55 BIC=522.08
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.3035616 3.113754 2.405275 0.2805566 1.917463 0.5315228
## ACF1
## Training set -0.01715517
sar_fit1 = sarima(df.ts,1,1,1)
## initial value 1.731200
## iter 2 value 1.297094
## iter 3 value 1.179777
## iter 4 value 1.161623
## iter 5 value 1.138500
## iter 6 value 1.136236
## iter 7 value 1.136037
## iter 8 value 1.135978
## iter 9 value 1.135976
## iter 10 value 1.135975
## iter 11 value 1.135975
## iter 11 value 1.135975
## iter 11 value 1.135975
## final value 1.135975
## converged
## initial value 1.145078
## iter 2 value 1.144888
## iter 3 value 1.144609
## iter 4 value 1.144598
## iter 5 value 1.144594
## iter 6 value 1.144593
## iter 7 value 1.144593
## iter 7 value 1.144593
## iter 7 value 1.144593
## final value 1.144593
## converged
sar_fit2 = sarima(df.ts,1,1,2)
## initial value 1.731200
## iter 2 value 1.352439
## iter 3 value 1.266267
## iter 4 value 1.146901
## iter 5 value 1.140256
## iter 6 value 1.135951
## iter 7 value 1.135610
## iter 8 value 1.135554
## iter 9 value 1.135461
## iter 10 value 1.135306
## iter 11 value 1.135268
## iter 12 value 1.135229
## iter 13 value 1.135186
## iter 14 value 1.135135
## iter 15 value 1.135066
## iter 16 value 1.135027
## iter 17 value 1.134977
## iter 18 value 1.134950
## iter 19 value 1.134942
## iter 20 value 1.134942
## iter 21 value 1.134942
## iter 22 value 1.134942
## iter 23 value 1.134942
## iter 23 value 1.134942
## iter 23 value 1.134942
## final value 1.134942
## converged
## initial value 1.146358
## iter 2 value 1.146089
## iter 3 value 1.145516
## iter 4 value 1.145425
## iter 5 value 1.145266
## iter 6 value 1.144892
## iter 7 value 1.144682
## iter 8 value 1.144602
## iter 9 value 1.144593
## iter 10 value 1.144593
## iter 10 value 1.144593
## final value 1.144593
## converged
# or tsdiag(sar_fit1)
library(forecast)
accuracy(fit1)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.04803303 3.10304 2.395923 0.073378 1.914083 0.5294563
## ACF1
## Training set -0.007904884
plot(fit1$x,col="red")
lines(fitted(fit1),col="blue")
legend('topleft',c("actual","fitted"),col=c("blue","red"),lty=1)
General framework - equation - backshift operator
# Monthly anti-diabetic drug sales in Australia from 1992 to 2008.
library(fpp)
df.ts= a10
dff1=diff(df.ts)
sdiff=diff(dff1,12) #since monthly period=12
## Seasonal differencing
par(mfrow=c(2,1))
plot(df.ts,col="blue", main="usual differencing",ylim=c(-10,30))
lines(dff1, col="red")
plot(df.ts,col="blue", main="seasonal differencing",ylim=c(-5,30))
lines(sdiff, col="red")
Step 1. Find D & d
library(astsa)
par(mfrow=c(4,1))
tr.jj=diff(log(jj)) # log remove variance
sdiff=diff(tr.jj,4) # since quarterly
plot(jj)
plot(log(jj))
plot(tr.jj,ylab="diff(log(jj))")
plot(sdiff)
acf(jj,100)
acf(tr.jj,100)
acf(sdiff,100)
# lag1 = seasonal 1
# D = 0,1,d = 0,1
Step 2. Find Q & q
acf(sdiff,20)
# Q:lag0-lag1之间cutoff
# q:lag的cutoff
# Q = 0,1
# q = 0,1
# parsimony principle: Q+D+P+p+d+q < 6
Step 3. Find P & p
pacf(sdiff,20)
# P:lag0-lag1之间cutoff
# p:lag的cutoff
# Q = 0,1
# q = 0,1
# parsimony principle: Q+D+P+p+d+q < 6
Step 4. applied sarima function
library(forecast)
# by hand
sarima(log(jj), 0,1,1,1,1,0,4,no.constant=FALSE)
## initial value -2.237259
## iter 2 value -2.429075
## iter 3 value -2.446738
## iter 4 value -2.455821
## iter 5 value -2.459761
## iter 6 value -2.462511
## iter 7 value -2.462602
## iter 8 value -2.462749
## iter 9 value -2.462749
## iter 9 value -2.462749
## iter 9 value -2.462749
## final value -2.462749
## converged
## initial value -2.411490
## iter 2 value -2.412022
## iter 3 value -2.412060
## iter 4 value -2.412062
## iter 4 value -2.412062
## iter 4 value -2.412062
## final value -2.412062
## converged
## $fit
##
## Call:
## stats::arima(x = xdata, order = c(p, d, q), seasonal = list(order = c(P, D,
## Q), period = S), include.mean = !no.constant, transform.pars = trans, fixed = fixed,
## optim.control = list(trace = trc, REPORT = 1, reltol = tol))
##
## Coefficients:
## ma1 sar1
## -0.6796 -0.3220
## s.e. 0.0969 0.1124
##
## sigma^2 estimated as 0.007913: log likelihood = 78.46, aic = -150.91
##
## $degrees_of_freedom
## [1] 77
##
## $ttable
## Estimate SE t.value p.value
## ma1 -0.6796 0.0969 -7.0104 0.0000
## sar1 -0.3220 0.1124 -2.8641 0.0054
##
## $AIC
## [1] -1.840408
##
## $AICc
## [1] -1.838555
##
## $BIC
## [1] -1.753721
sarima(log(jj), 2,0,0,1,1,0,4, no.constant=FALSE)
## initial value -2.405347
## iter 2 value -2.484501
## iter 3 value -2.497205
## iter 4 value -2.498334
## iter 5 value -2.498401
## iter 6 value -2.498403
## iter 7 value -2.498403
## iter 7 value -2.498403
## iter 7 value -2.498403
## final value -2.498403
## converged
## initial value -2.446934
## iter 2 value -2.449248
## iter 3 value -2.449834
## iter 4 value -2.449862
## iter 5 value -2.449866
## iter 6 value -2.449866
## iter 7 value -2.449866
## iter 7 value -2.449866
## iter 7 value -2.449866
## final value -2.449866
## converged
## $fit
##
## Call:
## stats::arima(x = xdata, order = c(p, d, q), seasonal = list(order = c(P, D,
## Q), period = S), xreg = constant, transform.pars = trans, fixed = fixed,
## optim.control = list(trace = trc, REPORT = 1, reltol = tol))
##
## Coefficients:
## ar1 ar2 sar1 constant
## 0.2686 0.2855 -0.2695 0.0382
## s.e. 0.1137 0.1214 0.1212 0.0042
##
## sigma^2 estimated as 0.007403: log likelihood = 82.47, aic = -154.95
##
## $degrees_of_freedom
## [1] 76
##
## $ttable
## Estimate SE t.value p.value
## ar1 0.2686 0.1137 2.3616 0.0208
## ar2 0.2855 0.1214 2.3516 0.0213
## sar1 -0.2695 0.1212 -2.2236 0.0291
## constant 0.0382 0.0042 8.9990 0.0000
##
## $AIC
## [1] -1.866849
##
## $AICc
## [1] -1.860671
##
## $BIC
## [1] -1.723353
Step 5. applied auto.arima function
# auto
sarima.fit=auto.arima(log(jj),seasonal = TRUE)
checkresiduals(auto.arima(log(jj),seasonal = TRUE))
##
## Ljung-Box test
##
## data: Residuals from ARIMA(2,0,0)(1,1,0)[4] with drift
## Q* = 5.3331, df = 4, p-value = 0.2548
##
## Model df: 4. Total lags used: 8
Step 6. prediction seasonal arima
pred.fit = forecast(sarima.fit)
autoplot(pred.fit,20)
# The 95% prediction interval suggested that the real observation was highly likely to fall within the range of values between 2.745580 and 3.091612
z = c(1,-1.5,.75) # coefficients of the polynomial
a = polyroot(z)[1] # 1+0.57735i
arg = Arg(a)/(2*pi) # arg in cycles/pt
1/arg # the pseudo period
## [1] 12
# produce plots
set.seed(8675309)
ar2 = arima.sim(list(order=c(2,0,0), ar=c(1.5,-.75)), n = 144)
plot(ar2, axes=FALSE, xlab="Time")
axis(2); axis(1, at=seq(0,144,by=12)); box()
abline(v=seq(0,144,by=12), lty=2,col='grey')
# produc acf
ACF = ARMAacf(ar=c(1.5,-.75), ma=0, 50)
PACF = ARMAacf(ar=c(1.5,-.75), ma=0, 50,pacf=TRUE)
plot(ACF, type="h", xlab="lag")
plot(PACF, type="h", xlab="lag")
abline(h=0)
# compute covariance -- gamma
acf(ar2,plot = F,type="covariance")
##
## Autocovariances of series 'ar2', by lag
##
## 0 1 2 3 4 5 6 7 8 9
## 8.949 7.547 4.542 1.115 -1.900 -3.801 -4.324 -3.847 -2.720 -1.362
## 10 11 12 13 14 15 16 17 18 19
## -0.116 0.788 1.182 1.082 0.576 -0.136 -0.780 -1.106 -0.940 -0.272
## 20 21
## 0.594 1.253
# compute correlation -- rho
acf(ar2,plot = F,type="correlation")
##
## Autocorrelations of series 'ar2', by lag
##
## 1 2 3 4 5 6 7 8 9 10
## 0.843 0.508 0.125 -0.212 -0.425 -0.483 -0.430 -0.304 -0.152 -0.013
## 11 12 13 14 15 16 17 18 19 20
## 0.088 0.132 0.121 0.064 -0.015 -0.087 -0.124 -0.105 -0.030 0.066
## 21
## 0.140
# compute partial correlation pacf
acf(ar2,plot = F,type="partial")
##
## Partial autocorrelations of series 'ar2', by lag
##
## 1 2 3 4 5 6 7 8 9 10
## 0.843 -0.705 -0.073 -0.131 0.054 -0.001 -0.161 0.023 -0.054 0.019
## 11 12 13 14 15 16 17 18 19 20
## -0.035 -0.093 -0.020 -0.098 -0.029 -0.026 0.005 0.072 0.051 -0.030
## 21
## -0.067
pacf(ar2,plot = F)
##
## Partial autocorrelations of series 'ar2', by lag
##
## 1 2 3 4 5 6 7 8 9 10
## 0.843 -0.705 -0.073 -0.131 0.054 -0.001 -0.161 0.023 -0.054 0.019
## 11 12 13 14 15 16 17 18 19 20
## -0.035 -0.093 -0.020 -0.098 -0.029 -0.026 0.005 0.072 0.051 -0.030
## 21
## -0.067
\(y_t=Asin(2*\pi*f*t+\phi)\) https://ademos.people.uic.edu/Chapter23.html
t = seq(0,4*pi,,100)
a = 3 # amplitude
b = 2 # width
# noiseless plot of sine wave
plot(a*sin(b*t),type='l',ylab='')
# add noise
n = 100
c.unif=runif(n) # uniform error
c.norm=rnorm(n) # gaussian/norma err
amp = 2 # amplitude
par(mfrow=c(3,1))
plot(a*sin(b*t)+c.unif*amp,type='l',ylab='add uniform error')
plot(a*sin(b*t)+c.norm*amp,type='l',ylab='add gaussian/norma err ')
plot(jitter(a*sin(b*t),factor=1000),type='l',ylab='add noise with jitter function') # easier way to add noise in R is by using the jitter function.
# In order to demonstrate the beauty of ARIMA, we’re going to create a more complex time series that is made up of two sine waves added together that go up exponentially.
par(mfrow=c(3,1))
Sine1 = jitter(30*sin(2*t),factor=200)
plot(t , Sine1,type = 'l')
Sine2 = jitter(20*sin(5*t+2),factor=200)
plot(t , Sine2,type = 'l')
TwoSines<-Sine1+Sine2
plot(TwoSines,type = 'l')
# the fact that there is a slow cycle of oscillations (Sine1)
# the fact that there is a fast cycle of oscillations (Sine2)
LineGoingUp<-seq(0.5,50,0.5)
ExponentialLineGoingUp<-(3/5000000)*LineGoingUp^5
par(mfrow=c(2,1))
plot(ExponentialLineGoingUp, type="l") #This is the line that we'll add to our time series to make it go up exponentially
TwoSinesGoingUpExponentially<-TwoSines+ExponentialLineGoingUp
plot(t,TwoSinesGoingUpExponentially,type="l")
# the fact that there’s noise in the data.
# the fact that it goes up over time
# the fact that its growth rate INCREASES over time
par(mfrow=c(2,1))
MultiplicativeTwoSinesGoingUp<-(TwoSines+100)*LineGoingUp
plot(t,MultiplicativeTwoSinesGoingUp,type="l")
LogTransformedSine<-log(MultiplicativeTwoSinesGoingUp)
plot(t,LogTransformedSine,type="l")
#The seasonal component at the beginning of the series is smaller than the seasonal component later in the series. in plot 1
# log fix it
tseries::adf.test(TwoSinesGoingUpExponentially, alternative = "stationary")
##
## Augmented Dickey-Fuller Test
##
## data: TwoSinesGoingUpExponentially
## Dickey-Fuller = -0.7137, Lag order = 4, p-value = 0.9666
## alternative hypothesis: stationary
# It looks like our p value is above .05, meaning that our data is indeed NON-stationary. This confirms the results of our visual inspection.
Diff1TwoSinesGoingUpExponentially<-diff(TwoSinesGoingUpExponentially, differences=1)
Diff2TwoSinesGoingUpExponentially<-diff(TwoSinesGoingUpExponentially, differences=2)
plot(Diff2TwoSinesGoingUpExponentially, type="l")
par(mfrow=c(3,1))
plot(TwoSinesGoingUpExponentially, type="l")
plot(Diff1TwoSinesGoingUpExponentially, type="l")
plot(Diff2TwoSinesGoingUpExponentially, type="l")
tseries::adf.test(Diff1TwoSinesGoingUpExponentially, alternative = "stationary")
##
## Augmented Dickey-Fuller Test
##
## data: Diff1TwoSinesGoingUpExponentially
## Dickey-Fuller = -3.0002, Lag order = 4, p-value = 0.1625
## alternative hypothesis: stationary
tseries::adf.test(Diff2TwoSinesGoingUpExponentially, alternative = "stationary")
##
## Augmented Dickey-Fuller Test
##
## data: Diff2TwoSinesGoingUpExponentially
## Dickey-Fuller = -10.267, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
# until 2 become stationarity
acf(Diff2TwoSinesGoingUpExponentially, lag.max=30)
# The little dotted blue line means that the autocorrelations exceed significance bounds.
# In our case, it looks like our time series data repeatedly exceeds these bounds at certain lag points. There’s a recurring pattern involved. not good!
pacf(Diff2TwoSinesGoingUpExponentially, lag.max=30)
# Again, our data seems to follow a pattern at regular lag intervals. This is a sign that our data involves some kind of seasonal component
PGram<-TSA::periodogram(Diff2TwoSinesGoingUpExponentially)
PGramAsDataFrame = data.frame(freq=PGram$freq, spec=PGram$spec)
order = PGramAsDataFrame[order(-PGramAsDataFrame$spec),]
top2 = head(order, 2)
top2 # display the 2 highest "power" frequencies
## freq spec
## 10 0.10 2846.8854
## 4 0.04 186.6686
TimePeriod<-1/top2[2,1] # 1/0.04
TimePeriod
## [1] 25
TimePeriod2<-1/top2[1,1]
TimePeriod2
## [1] 10
# These values – 25 and 10 – correspond exactly to the original sine waves that we put together. If you recall these plots, the first (slow) sine wave completed a cycle once every 25 time points, and the second (fast) sine wave completed its cycle once every 10 time points.
#Decomposing data once we know the frequency
TwoSinesGoingUpExponentiallyFreq10 <- ts(Diff2TwoSinesGoingUpExponentially, frequency=10)
RDecomp<-decompose(TwoSinesGoingUpExponentiallyFreq10)
plot(RDecomp)
SineWavesDecomposed <- stl(TwoSinesGoingUpExponentiallyFreq10, s.window="periodic")
plot(SineWavesDecomposed)
# a higher-frequency sine wave and a lower-frequency sine wave. That’s why, after the higher-frequency component is pulled apart, the trend still looks sine-y.
SineWavesDecomposedSeasonalAdjusted <- seasadj(SineWavesDecomposed)
plot(SineWavesDecomposedSeasonalAdjusted)
TwoSinesGoingUpExponentiallyFreq25 <- ts(SineWavesDecomposedSeasonalAdjusted, frequency=25)
SineWavesDecomposed2 <- stl(TwoSinesGoingUpExponentiallyFreq25, s.window="periodic")
plot(SineWavesDecomposed2)
SineWavesDecomposedSeasonalAdjusted2 <- seasadj(SineWavesDecomposed2)
plot(SineWavesDecomposedSeasonalAdjusted2)
#Now all that’s left here is the noise, with the seasonal component from the sine waves removed.
#Now if we look at the autocorrelations and partial autocorrelations, we should see that our values don’t follow a pattern where they cross significance at recurring time lags as before:
acf(as.numeric(SineWavesDecomposedSeasonalAdjusted2), lag.max = 30) #Because we're using a time series object, we'd have to convert it to a numeric vector instead. Otherwise the lags aren't presented as they should be.
pacf(as.numeric(SineWavesDecomposedSeasonalAdjusted2), lag.max = 30)
# The maximum significant lag values of the correlogram gives you the possible q values for the ARMA model. For instance, if our maximum value is 3, then an ARMA(0,3) model is possible.
# The maximum significant lag values of the partial correlogram gives you the p value for an ARMA model. For instance, if our maximum value is 3, then an an ARMA(3,0) model would also be possible.
library(forecast)
elecequip=fpp::elecequip
# a. Decompose the model to trend, seasonality, and stochastic parts.
eeadj2 <- stl(elecequip, s.window="periodic")
#head(eeadj2)
plot(eeadj2)
# b. Adjust for seasonality.
eeadj <- seasadj(stl(elecequip, s.window="periodic"))
head(elecequip)
## Jan Feb Mar Apr May Jun
## 1996 79.43 75.86 86.40 72.67 74.93 83.88
head(eeadj)
## Jan Feb Mar Apr May Jun
## 1996 84.99327 81.94829 78.42206 79.01742 79.74840 76.12854
par(mfrow=c(2,1))
plot(elecequip,main="original")
plot(eeadj,main="after seasonal adj")
1. The time plot shows some sudden changes, particularly the big drop in 2008/2009. These changes are due to the global economic environment. Otherwise there is nothing unusual about the time plot and there appears to be no need to do any data adjustments. 2. There is no evidence of changing variance, so we will not do a Box-Cox transformation. 3. The data are clearly non-stationary as the series wanders up and down for long periods. Consequently, we will take a first difference of the data. The differenced data are shown in Figure 8.12. These look stationary, and so we will not consider further differences.
# c. Find candidate models and find the model with the smallest AIC value. Plot the residuals of the model and comment on your findings.
tsdisplay(diff(eeadj),main="") # d =1
4. The PACF shown in plots is suggestive of an AR(3) model. So an initial candidate model is an ARIMA(3,1,0). There are no other obvious candidate models. 5. We fit an ARIMA(3,1,0) model along with variations including ARIMA(4,1,0), ARIMA(2,1,0), ARIMA(3,1,1), etc. Of these, the ARIMA(3,1,1) has a slightly smaller AICc value.
fit.eeadj <- Arima(eeadj, order=c(3,1,1))
summary(fit.eeadj )
## Series: eeadj
## ARIMA(3,1,1)
##
## Coefficients:
## ar1 ar2 ar3 ma1
## 0.0519 0.1191 0.3730 -0.4542
## s.e. 0.1840 0.0888 0.0679 0.1993
##
## sigma^2 estimated as 9.737: log likelihood=-484.08
## AIC=978.17 AICc=978.49 BIC=994.4
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.001227744 3.079373 2.389267 -0.04290849 2.517748 0.2913919
## ACF1
## Training set 0.008928479
acf(residuals(fit.eeadj ))
Box.test(residuals(fit.eeadj), lag=24, fitdf=4, type="Ljung")
##
## Box-Ljung test
##
## data: residuals(fit.eeadj)
## X-squared = 20.496, df = 20, p-value = 0.4273
# High p-value suggests residuals are white noise
# null white noise (model is just fine)
pred = predict(fit.eeadj, n.ahead = 36)
plot(elecequip,type='l',xlim=c(2004,2014),ylim=c(60,140))
lines((pred$pred),col='blue')
lines((pred$pred+2*pred$se),col='orange') #如果log了则10^(pred$pred+2*pred$se),
lines((pred$pred-2*pred$se),col='orange')
cs = 2*cos(2*pi*1:500/50 + .6*pi)
cos=cos(2*pi*1:500/50)
plot.ts(cs, main=expression(2*cos(2*pi*t/50+.6*pi)),lwd = 5,col='darkgrey')
abline(v=c(seq(50, 500, by=50)),col="red",lwd = 2)
lines(cos,col="blue",lwd = 2)
grid(col = "grey")
# A =2,
# w = 1/50 (one cycle every 50 time points) - red
# phi = 0.6pi - blue
cs=ts(cs)
acf(cs,lag.max = 500)
x1 = 2*cos(2*pi*1:100*6/100) + 3*sin(2*pi*1:100*6/100)
x2 = 4*cos(2*pi*1:100*10/100) + 5*sin(2*pi*1:100*10/100)
x3 = 6*cos(2*pi*1:100*40/100) + 7*sin(2*pi*1:100*40/100)
x = x1 + x2 + x3
par(mfrow=c(2,2))
plot.ts(x1, ylim=c(-10,10), main=expression(omega==6/100~~~A^2==13))
plot.ts(x2, ylim=c(-10,10), main=expression(omega==10/100~~~A^2==41))
plot.ts(x3, ylim=c(-10,10), main=expression(omega==40/100~~~A^2==85))
plot.ts(x, ylim=c(-16,16), main="sum")
x1 = 2*cos(2*pi*1:100*6/100) + 3*sin(2*pi*1:100*6/100)
# A = sqrt(2^2+3^2)=sqrt 13 w = 6/100
x2 = 4*cos(2*pi*1:100*10/100) + 5*sin(2*pi*1:100*10/100)
# A^2 = sqrt(4^2+5^2)= 41 w = 10/100
x3 = 6*cos(2*pi*1:100*40/100) + 7*sin(2*pi*1:100*40/100)
# A^2 = sqrt(6^2+7^2)= 85 w = 40/100
x=x1+x2+x3
par(mfrow=c(2,2))
plot.ts(x1, ylim=c(-10,10), main=expression(omega==6/100~~~A^2==13))
plot.ts(x2, ylim=c(-10,10), main=expression(omega==10/100~~~A^2==41))
plot.ts(x3, ylim=c(-10,10), main=expression(omega==40/100~~~A^2==85))
plot.ts(x, ylim=c(-16,16), main="sum")
# Method 01
P = abs(2*fft(x)/100)^2
#Fast Discrete Fourier Transform (FFT)
Fr = 0:99/100
par(mfrow=c(1,1))
plot(Fr, P, type="o", xlab="frequency", ylab="periodogram",main='Periodogram with fft')
# spike 0.06, frequency= 6/100 ,0.6 cycle 10 time point(second)
# A = 13 --------- x1
# spike 0.1, frequency= 1/10 ,1 cycle 10 time point(second)
# A = 41 --------- x2
# spike 0.4, frequency= 4/10 ,4 cycle 10 time point(second)
# A = 85 --------- x3
#There is a mirroring effect at the folding frequency of 1/2; consequently, the periodogram is typically not plotted for frequencies higher than the folding frequency.**_
# sp <- spectrum(meas$Cases, log="no", spans=c(2,2), plot=FALSE)
# delta <- 1/12
# specx <- mspect$freq/delta
# specy <- 2*mspect$spec
# plot(specx, specy, xlab="Period (years)", ylab="Spectral Density", type="l") # 横坐标为year 而不是frequency
# Method 02
sp=spectrum(x, log="no")
1/sp$fre[which.max(sp$spec)] #1/frequency gives you the number of cycles
## [1] 2.5
max(sp$spec) #estimate of spectral density at predominant period
## [1] 1967.092
U = qchisq(.025,2)
L = qchisq(.975,2)
cat("Lower bound for spectral density is : ",2*max(sp$spec)/L ,"\nUpper bound for spectral density is : ",2*max(sp$spec)/U )#)#lower bound for spectral density
## Lower bound for spectral density is : 533.2493
## Upper bound for spectral density is : 77696
# Zoom
# plot(sp$freq[1:100], sp$spec[1:100], type="l")
# spectrum function
# spd=spectrum(ts(detrend, frequency = 1),, log="no")
# The function `spectrum` is a wrapper for calculating the periodogram (i.e., an estimate of the spectral density) from time series. There are a couple issues that need to keep in mind:
#
# * `spectrum` calculates the frequency axis in terms of cycles per sampling interval; it makes more sense to convert to cycles per unit time (so divide by the sampling interval)
# * the spectrum will be far more interpretable if it is smoothed
# + Use the argument `**spans**`, which specifies the parameter(s) for the what is known as the modified Daniell kernel for smoothing the spectrum
# _ spans=c(7,7)
# + Modified Daniell **kernel** is essentially just a running average (see code below for a sense of what these parameters do)
# _ kernel("modified.daniell") & kernel("daniell",4)
# No hard-and-fast rule for how to do this (try a couple different values)
# + the higher the number of spans, the more smoothing and the lower the peaks of the spectrum will be
# * the default for `spectrum` is to calculate the spectrum on a log-scale
# Use the argument `log="no"` to change this default
# * The spectrum needs to be multiplied by 2 to make it actually equal to variance
# Method 03
# spec.pgram(x, taper=0, log="no")
# soi.ave = spec.pgram(soi, k, taper=0, log="no")
# k = kernal kernel("modified.daniell") or kernel("daniell",n) n越大越smooth
# log scaling will spread out the low frequencies and squish the high frequencies: default = log_10 所以要关掉
# taper
窗函数是频谱分析中一个重要的部分,窗函数修正了由于信号的非周期性并减小了频谱中由于泄露而带来的测量不准确性。 快速傅里叶变换假定了时间信号是周期无限的。但在分析时,我们往往只截取其中的一部分,因此需要加窗以减小泄露。窗函数可以加在时域,也可以加在频域上,但在时域上加窗更为普遍。截断效应带来了泄漏,窗函数是为了减小这个截断效应,其设计成一组加权系数。例如,一个窗函数可以定义为:
w(t)=g(t) -T/2<t<T/2
w(t)=0 其他
g(t)是窗函数,T是窗函数的时间. 待分析的数据x(t)则表示为:
x(t)=w(t)*x(t)’
x(t)’表示原始信号,x(t)表示待分析信号。 boxcar window function//hamming window function https://blog.csdn.net/u013346007/article/details/54143599
Tapering :that it includes just a few oscillations of the period of interest. ### Convolution Theorem • by the convolution theorem, Fourier Transform of short piece is the Fourier Transform of indefinitely long time series convolved with Fourier Transform of window function. • So Fourier Transform of short piece is exactly Fourier Transform of indefinitely long time series when Fourier Transform of window function is a spike. Boxcar function
• Left – time domain; Right – frequency domain • Top: indefinitely long cosine function, whose Fourier Transform is a spike = boxcar function is any function which is zero over the entire real line except for a single interval where it is equal to a constant • Middle: Boxcar, whose Fourier Transform is a sinc() function • Bottom: Windowed time series, and its Fourier Transform.
hamming window function
to compute the power spectral density with each of these window functions separately and then average the result. :always taper a time series before computing the p.s.d. try a simple Hamming taper first it’s simple use multi-taper analysis when higher resolution is needed e.g. when the time series is very short
Non Parametric Vs. Parametric Es>ma>on • Parametric estimation = estimate a model that is specified by a fixed number of parameters. • Nonparametric estimation = estimate a model that is specified by a number of parameters that can grow as the sample grows. • Thus, the smoothed periodogram estimates we have considered are nonparametric: the estimates of the spectral density can be parameterized by estimated values at each of the Fourier frequencies. • As the sample size grows, the number of distinct frequency values increases. The time domain models we considered (linear processes) are parametric. • For example, and ARMA(p,q) process can be completely specified with p + q + 1 parameters.
# SOI & Recruitment
library(stats)
par(mfrow=c(2,1))
soi.per = spec.pgram(soi, taper=0, log="no")
abline(v=1, lty="dotted",col=2)
abline(v=1/4, lty="dotted",col=2)
abline(h=c(0.2635573167 , 38.40108003), lty="dotted",col=4) #fS(1/12) - blue
abline(h=c( 0.01456529563 , 2.122206623 ), lty="dotted",col='pink') #fS(1/48) - pink
rec.per = spec.pgram(rec, taper=0, log="no")
abline(v=1/4, lty="dotted",col=2)
# frequency axis is labeled in multiples of 1/12 m
# thus frequency = 1 m=> 1/12
# We notice a narrow-band peak at the obvious yearly (12 month) cycle w=1/12
# in a wide band at the lower frequencies that is centered around the four-year (48 month) cycle 1/4 m= 1/4*1/12 = 1/48 possible El Ni~no effect . This wide band activity suggests that the possible El Ni~no cycle is irregular, but tends to be around four years on average.
cat("periodogram of the SOI series at freq 1/12 = 40/480" ,soi.per$spec[40],'\n') # 0.9722312 at 1/12
## periodogram of the SOI series at freq 1/12 = 40/480 0.9722312
cat("periodogram of the SOI series at freq 1/48 = 10/480" ,soi.per$spec[10]) # 0.05372962 at 1/48
## periodogram of the SOI series at freq 1/48 = 10/480 0.05372962
# conf intervals - returned value:
U = qchisq(.025,2) # 0.05063
L = qchisq(.975,2) # 7.37775
# [0.26; 38.4] confidence interval for the spectrum fS(1/12)
# [0.01; 2.12] confidence interval for the spectrum fS(1/48)
cat("The 95% confidence interval for the spectrum fS(1/12) is [",2*soi.per$spec[40]/L,',',2*soi.per$spec[40]/U ,"]","\n","The 95% confidence interval for the spectrum fS(1/48) is [",2*soi.per$spec[10]/L,',',2*soi.per$spec[10]/U ,"]")
## The 95% confidence interval for the spectrum fS(1/12) is [ 0.2635573 , 38.40108 ]
## The 95% confidence interval for the spectrum fS(1/48) is [ 0.0145653 , 2.122207 ]
# fS(1/12) : [ 0.2635573167 , 38.40108003 ] that the lower value of 0.26 is higher than any other periodogram ordinate, so it is safe to say that this value is significant.
# fS(1/48) : [ 0.01456529563 , 2.122206623 ] again is extremely wide, and with which we are unable to establish significance of the peak.
# the periodogram as an estimator is susceptible to large uncertainties, and we need to find a way to reduce the variance.
# bandwidth in this case is Bw = 9/480 = :01875 cycles per month for the spectral estimator
par(mfrow=c(2,1))
k = kernel("daniell", 4)
soi.ave = spec.pgram(soi, k, taper=0, log="no")
abline(v=c(.25,1,2,3), lty=2)
# Repeat above lines using rec in place of soi on line 3
soi.ave$bandwidth # 0.0649519 = reported bandwidth
## [1] 0.06495191
soi.ave$bandwidth*(1/12)*sqrt(12) # 0.01875 = Bw
## [1] 0.01875
soi.ave = spec.pgram(rec, k, taper=0, log="no")
abline(v=c(.25,1,2,3), lty=2)
df = soi.ave$df
U = qchisq(.025, df)
L = qchisq(.975, df)
soi.ave$spec[10]
## [1] 658.9607
soi.ave$spec[40]
## [1] 219.3919
# intervals
df*soi.ave$spec[10]/L
## [1] 370.9816
df*soi.ave$spec[10]/U
## [1] 1481.501
df*soi.ave$spec[40]/L
## [1] 123.5133
df*soi.ave$spec[40]/U
## [1] 493.2453
par(mfrow=c(1,1))
soi.ave = spec.pgram(soi, kernel("modified.daniell", c(1,2)), taper=0, log="no")
abline(v=c(.25,1), lty=2)
soi.ave = spec.pgram(soi, kernel("modified.daniell", c(5,3)), taper=0, log="no")
abline(v=c(.25,1), lty=2)
soi.ave = spec.pgram(rec, kernel("modified.daniell", c(5,3)), taper=0, log="no")
abline(v=c(.25,1), lty=2)
s0 = spectrum(soi, spans=c(7,7), taper=0, plot=FALSE)
s50 = spectrum(soi, spans=c(7,7), taper=.5, plot=FALSE)
plot(s0$freq, s0$spec, log="y", type="l", lty=2, ylab="spectrum",
xlab="frequency")
# dash line taper=0 without tapering
lines(s50$freq, s50$spec,col=4)
# solid line taper=0.5
# Notice that the tapered spectrum does a better job in separating the yearly cycle (w = 1) and the El Ni~no cycle (w=1/4).
sr=spec.pgram(cbind(soi,rec),kernel("daniell",9),taper=0,plot=FALSE)
sr$df # df = 35.8625
## [1] 35.8625
f = qf(.999, 2, sr$df-2) # = 8.529792
C = f/(18+f) # = 0.318878
plot(sr, plot.type = "coh", ci.lty = 2)
abline(h = C)
#coherence is persistent at the seasonal harmonic frequencies.
# the seasonal frequency and the El Ni~no frequencies ranging between about 3 and 7 year periods are strongly coherent
ccf(soi,rec)
# There is clearly significant cross-correlation for short lags. Now we can investigate the wavelet coherence for the two series.
俩个wave 是否一致
Coherence is a time-series measure similar to correlation. It’s a measure of recurrent phenomena (i.e., waves). Two waves are coherent if they have a constant relative phase.
Most approaches to finding periodic behavior (including coherence) assume that the underlying series are stationary, meaning that the mean of the process remains constant. Clearly, this is not such a good assumption when the goal of an analysis is to study environmental change. Wavelets allow us to study localized periodic behavior. In particular, we look for regions of high-power in the frequency-time plot.
We can demonstrate with a toy example from the biwavelet package. We know that the Multivariate ENSO Index (MEI) and the North Pacific Gyre Oscillation (NPGO) experience coherent fluctuations with a frequency of approximately 5-12 years. A small data set is included with biwavelet that includes monthly values of these two series from 1950-2009.
AR(1)-ARCH(1) Model
# alpha0=5,alpha1=0.5
set.seed(101)
library(TSA)
sim.data = ts(garch.sim(alpha=c(5,0.5),n=300))
plot(sim.data,main='simulated ARCH(1)')
acf(sim.data,25)
# No correlations are significant, so the series looks to be white noise.
pacf(sim.data^2,25)
# has a single spike at lag 1 suggesting an AR(1) model for the squared series ~> ARCH(1)
# simulated by loop
# y = rnorm(1000)
# w=y
# Y = c()
# b=(length(y))
# for (t in 1:b){
# Y[t] = w[t] * sqrt((a0 + a1*(y[t-1])^2))
# }
# simulated ARCH(1) series, looks like white noise
gnp=astsa::gnp
library(TSA)
gnpgr = diff(log(gnp)) # get the growth rate - y
ar1=arima(gnpgr,order=c(1,0,0))# fit an AR(1)
resi =ar1$residuals
acf2(resi^2, 24) # get (p)acf of the squared residuals
## ACF PACF
## [1,] 0.12 0.12
## [2,] 0.13 0.12
## [3,] 0.03 0.00
## [4,] 0.13 0.12
## [5,] 0.01 -0.02
## [6,] 0.05 0.02
## [7,] -0.03 -0.04
## [8,] 0.06 0.05
## [9,] 0.08 0.08
## [10,] -0.08 -0.12
## [11,] 0.09 0.11
## [12,] 0.10 0.09
## [13,] 0.01 -0.05
## [14,] 0.04 0.05
## [15,] 0.15 0.13
## [16,] -0.02 -0.09
## [17,] 0.04 0.01
## [18,] -0.05 -0.05
## [19,] 0.01 0.00
## [20,] 0.05 0.04
## [21,] 0.06 0.05
## [22,] -0.06 -0.05
## [23,] 0.02 -0.03
## [24,] 0.03 0.03
# alpha0=5,alpha1=0.05,beta1=0.5
set.seed(34662)
library(TSA)
sim.data2 = ts(garch.sim(alpha=c(5,0.05),beta=0.5,n=300))
plot(sim.data2 ,main='simulated GARCH(1,1)')
acf(sim.data2,25)
pacf(sim.data,25)
# The ACF of the series below shows that the series looks to be white noise.
acf(sim.data2^2,25)
pacf(sim.data2^2,25)
# has a single spike at lag 1 suggesting an AR(1) model for the squared series ~> ARCH(1)
nyse = astsa::nyse
library(fGarch)
## Loading required package: timeDate
##
## Attaching package: 'timeDate'
## The following objects are masked from 'package:TSA':
##
## kurtosis, skewness
## Loading required package: timeSeries
##
## Attaching package: 'timeSeries'
## The following object is masked from 'package:zoo':
##
## time<-
## Loading required package: fBasics
##
## Attaching package: 'fBasics'
## The following object is masked _by_ '.GlobalEnv':
##
## nyse
## The following object is masked from 'package:astsa':
##
## nyse
plot.ts(nyse)
acf2(nyse)
## ACF PACF
## [1,] 0.10 0.10
## [2,] -0.05 -0.07
## [3,] -0.05 -0.03
## [4,] -0.03 -0.03
## [5,] 0.07 0.07
## [6,] 0.01 -0.01
## [7,] 0.00 0.00
## [8,] -0.02 -0.01
## [9,] -0.02 -0.01
## [10,] -0.01 -0.02
## [11,] 0.01 0.01
## [12,] 0.01 0.01
## [13,] -0.02 -0.03
## [14,] 0.02 0.03
## [15,] -0.02 -0.02
## [16,] 0.04 0.04
## [17,] -0.01 -0.03
## [18,] -0.03 -0.01
## [19,] -0.03 -0.03
## [20,] -0.01 0.00
## [21,] -0.01 -0.02
## [22,] -0.01 -0.01
## [23,] 0.00 0.00
## [24,] 0.02 0.02
## [25,] -0.02 -0.02
## [26,] -0.03 -0.03
## [27,] 0.01 0.01
## [28,] 0.04 0.04
## [29,] 0.06 0.05
## [30,] 0.01 0.00
## [31,] -0.02 -0.01
## [32,] 0.07 0.08
## [33,] 0.05 0.04
## [34,] 0.01 -0.01
## [35,] -0.02 -0.01
## [36,] -0.02 0.00
## [37,] 0.02 0.02
## [38,] -0.04 -0.05
## [39,] -0.04 -0.04
## [40,] -0.03 -0.03
## [41,] -0.01 -0.01
## [42,] 0.02 0.01
## [43,] 0.02 0.01
## [44,] 0.01 0.00
## [45,] 0.01 0.01
## [46,] 0.00 0.00
## [47,] 0.00 0.01
## [48,] -0.01 -0.02
## [49,] -0.04 -0.04
## [50,] -0.01 0.01
## [51,] 0.03 0.03
## [52,] -0.04 -0.05
## [53,] 0.02 0.03
## [54,] -0.05 -0.04
## [55,] -0.07 -0.05
# This suggests an ARMA(1,1) So may be GARCH(1,1) model.
acf2(nyse^2)
## ACF PACF
## [1,] 0.09 0.09
## [2,] 0.23 0.22
## [3,] 0.12 0.09
## [4,] 0.02 -0.04
## [5,] 0.18 0.15
## [6,] 0.04 0.02
## [7,] 0.01 -0.06
## [8,] 0.06 0.02
## [9,] 0.05 0.07
## [10,] 0.01 -0.04
## [11,] 0.02 -0.02
## [12,] 0.01 0.02
## [13,] 0.02 0.01
## [14,] 0.01 -0.02
## [15,] 0.02 0.02
## [16,] 0.01 0.01
## [17,] 0.00 -0.01
## [18,] 0.02 0.01
## [19,] 0.01 0.02
## [20,] 0.00 -0.01
## [21,] 0.01 0.00
## [22,] 0.00 0.00
## [23,] 0.01 0.01
## [24,] 0.02 0.01
## [25,] 0.00 -0.01
## [26,] 0.01 0.00
## [27,] 0.02 0.02
## [28,] 0.00 -0.01
## [29,] 0.05 0.04
## [30,] 0.01 0.01
## [31,] 0.00 -0.02
## [32,] 0.03 0.02
## [33,] 0.01 0.02
## [34,] 0.01 -0.01
## [35,] 0.02 0.01
## [36,] 0.01 0.01
## [37,] 0.01 -0.01
## [38,] 0.01 -0.01
## [39,] 0.02 0.02
## [40,] 0.00 -0.01
## [41,] 0.02 0.00
## [42,] 0.00 0.00
## [43,] 0.02 0.02
## [44,] 0.00 -0.02
## [45,] 0.00 -0.01
## [46,] 0.00 0.01
## [47,] 0.01 0.01
## [48,] 0.02 0.01
## [49,] 0.00 0.00
## [50,] 0.01 0.00
## [51,] 0.02 0.01
## [52,] 0.03 0.02
## [53,] 0.01 -0.01
## [54,] 0.02 0.01
## [55,] 0.00 -0.01
nyse.g=garchFit(~garch(2,2),nyse)
##
## Series Initialization:
## ARMA Model: arma
## Formula Mean: ~ arma(0, 0)
## GARCH Model: garch
## Formula Variance: ~ garch(2, 2)
## ARMA Order: 0 0
## Max ARMA Order: 0
## GARCH Order: 2 2
## Max GARCH Order: 2
## Maximum Order: 2
## Conditional Dist: norm
## h.start: 3
## llh.start: 1
## Length of Series: 2000
## Recursion Init: mci
## Series Scale: 0.009862128
##
## Parameter Initialization:
## Initial Parameters: $params
## Limits of Transformations: $U, $V
## Which Parameters are Fixed? $includes
## Parameter Matrix:
## U V params includes
## mu -0.47751157 0.4775116 0.04775116 TRUE
## omega 0.00000100 100.0000000 0.10000000 TRUE
## alpha1 0.00000001 1.0000000 0.05000000 TRUE
## alpha2 0.00000001 1.0000000 0.05000000 TRUE
## gamma1 -0.99999999 1.0000000 0.10000000 FALSE
## gamma2 -0.99999999 1.0000000 0.10000000 FALSE
## beta1 0.00000001 1.0000000 0.40000000 TRUE
## beta2 0.00000001 1.0000000 0.40000000 TRUE
## delta 0.00000000 2.0000000 2.00000000 FALSE
## skew 0.10000000 10.0000000 1.00000000 FALSE
## shape 1.00000000 10.0000000 4.00000000 FALSE
## Index List of Parameters to be Optimized:
## mu omega alpha1 alpha2 beta1 beta2
## 1 2 3 4 7 8
## Persistence: 0.9
##
##
## --- START OF TRACE ---
## Selected Algorithm: nlminb
##
## R coded nlminb Solver:
##
## 0: 2545.5407: 0.0477512 0.100000 0.0500000 0.0500000 0.400000 0.400000
## 1: 2532.0400: 0.0477525 0.0903410 0.0503365 0.0464330 0.392922 0.392857
## 2: 2519.4286: 0.0477619 0.0758030 0.0812173 0.0511406 0.387829 0.387617
## 3: 2516.6880: 0.0478004 0.0719527 0.108783 0.0317149 0.380727 0.381498
## 4: 2514.4575: 0.0478468 0.0946886 0.128150 0.0145461 0.374834 0.377241
## 5: 2513.0100: 0.0478895 0.0942306 0.131508 0.00411633 0.369979 0.373422
## 6: 2512.9475: 0.0480706 0.0919428 0.133022 1.00000e-08 0.375954 0.381750
## 7: 2512.5162: 0.0481726 0.0868194 0.131158 1.00000e-08 0.376452 0.382950
## 8: 2512.3916: 0.0485484 0.0820068 0.130526 1.00000e-08 0.382237 0.387942
## 9: 2512.2060: 0.0496484 0.0831703 0.133068 1.00000e-08 0.381198 0.381935
## 10: 2512.1501: 0.0506613 0.0892665 0.138917 1.00000e-08 0.377390 0.375630
## 11: 2511.9816: 0.0517715 0.0850583 0.137198 1.00000e-08 0.378903 0.379098
## 12: 2511.9160: 0.0539673 0.0784120 0.133802 1.00000e-08 0.384213 0.389293
## 13: 2511.6972: 0.0562427 0.0784415 0.134483 1.00000e-08 0.378674 0.390041
## 14: 2511.5360: 0.0585190 0.0815826 0.137180 1.00000e-08 0.375407 0.388544
## 15: 2511.3778: 0.0632333 0.0808377 0.136229 1.00000e-08 0.405984 0.358803
## 16: 2511.2290: 0.0678814 0.0816100 0.137359 1.00000e-08 0.372546 0.389997
## 17: 2511.2205: 0.0701079 0.0802418 0.138295 1.00000e-08 0.379152 0.387685
## 18: 2511.1480: 0.0711607 0.0821857 0.142179 1.00000e-08 0.371971 0.387529
## 19: 2511.1368: 0.0722607 0.0809768 0.140672 1.00000e-08 0.376254 0.385605
## 20: 2511.1304: 0.0733844 0.0815530 0.141089 1.00000e-08 0.376647 0.384152
## 21: 2511.1288: 0.0744337 0.0816518 0.141413 1.00000e-08 0.377243 0.383178
## 22: 2511.1287: 0.0744020 0.0817745 0.141337 1.00000e-08 0.377275 0.383016
## 23: 2511.1287: 0.0743936 0.0817350 0.141360 1.00000e-08 0.377250 0.383085
## 24: 2511.1287: 0.0743941 0.0817349 0.141360 1.00000e-08 0.377250 0.383085
##
## Final Estimate of the Negative LLH:
## LLH: -6726.978 norm LLH: -3.363489
## mu omega alpha1 alpha2 beta1
## 7.336844e-04 7.949665e-06 1.413597e-01 1.000000e-08 3.772499e-01
## beta2
## 3.830851e-01
##
## R-optimhess Difference Approximated Hessian Matrix:
## mu omega alpha1 alpha2
## mu -3.132819e+07 -2.634055e+08 6.339972e+03 1.933046e+03
## omega -2.634055e+08 -4.787388e+12 -1.860717e+08 -2.026262e+08
## alpha1 6.339972e+03 -1.860717e+08 -1.377313e+04 -1.330570e+04
## alpha2 1.933046e+03 -2.026262e+08 -1.330570e+04 -1.539578e+04
## beta1 -1.440569e+04 -2.957359e+08 -1.438327e+04 -1.590971e+04
## beta2 -1.627753e+04 -2.988452e+08 -1.439136e+04 -1.567340e+04
## beta1 beta2
## mu -14405.69 -16277.53
## omega -295735878.61 -298845244.23
## alpha1 -14383.27 -14391.36
## alpha2 -15909.71 -15673.40
## beta1 -20334.26 -20490.65
## beta2 -20490.65 -20755.21
## attr(,"time")
## Time difference of 0.05946302 secs
##
## --- END OF TRACE ---
##
##
## Time to Estimate Parameters:
## Time difference of 0.207365 secs
summary(garchFit(~garch(1,1),nyse))
##
## Series Initialization:
## ARMA Model: arma
## Formula Mean: ~ arma(0, 0)
## GARCH Model: garch
## Formula Variance: ~ garch(1, 1)
## ARMA Order: 0 0
## Max ARMA Order: 0
## GARCH Order: 1 1
## Max GARCH Order: 1
## Maximum Order: 1
## Conditional Dist: norm
## h.start: 2
## llh.start: 1
## Length of Series: 2000
## Recursion Init: mci
## Series Scale: 0.009862128
##
## Parameter Initialization:
## Initial Parameters: $params
## Limits of Transformations: $U, $V
## Which Parameters are Fixed? $includes
## Parameter Matrix:
## U V params includes
## mu -0.47751157 0.4775116 0.04775116 TRUE
## omega 0.00000100 100.0000000 0.10000000 TRUE
## alpha1 0.00000001 1.0000000 0.10000000 TRUE
## gamma1 -0.99999999 1.0000000 0.10000000 FALSE
## beta1 0.00000001 1.0000000 0.80000000 TRUE
## delta 0.00000000 2.0000000 2.00000000 FALSE
## skew 0.10000000 10.0000000 1.00000000 FALSE
## shape 1.00000000 10.0000000 4.00000000 FALSE
## Index List of Parameters to be Optimized:
## mu omega alpha1 beta1
## 1 2 3 5
## Persistence: 0.9
##
##
## --- START OF TRACE ---
## Selected Algorithm: nlminb
##
## R coded nlminb Solver:
##
## 0: 2530.4637: 0.0477512 0.100000 0.100000 0.800000
## 1: 2519.8035: 0.0477528 0.0890814 0.0971176 0.792299
## 2: 2518.2039: 0.0477610 0.0778289 0.104356 0.789508
## 3: 2517.1996: 0.0477746 0.0798531 0.116900 0.793679
## 4: 2516.7331: 0.0478516 0.0691264 0.124616 0.792366
## 5: 2516.2421: 0.0481458 0.0683125 0.118305 0.802390
## 6: 2516.2194: 0.0482907 0.0677374 0.114458 0.803568
## 7: 2516.1678: 0.0485045 0.0683114 0.114028 0.805840
## 8: 2516.1316: 0.0487284 0.0669777 0.113452 0.807117
## 9: 2515.9510: 0.0525311 0.0734967 0.118200 0.793747
## 10: 2515.4092: 0.0634778 0.0698340 0.119846 0.800190
## 11: 2515.3030: 0.0664531 0.0680590 0.112251 0.803958
## 12: 2515.2868: 0.0674939 0.0694508 0.111240 0.806768
## 13: 2515.2002: 0.0670154 0.0686854 0.114811 0.803987
## 14: 2515.1933: 0.0675396 0.0680346 0.114070 0.804192
## 15: 2515.1731: 0.0680650 0.0682219 0.113933 0.804773
## 16: 2515.1629: 0.0685869 0.0674545 0.114831 0.805592
## 17: 2515.1136: 0.0724617 0.0662792 0.112970 0.808526
## 18: 2515.1033: 0.0750941 0.0676546 0.114639 0.804962
## 19: 2515.1021: 0.0747415 0.0672663 0.114077 0.806071
## 20: 2515.1021: 0.0747241 0.0672569 0.114080 0.806079
## 21: 2515.1021: 0.0747252 0.0672580 0.114080 0.806077
##
## Final Estimate of the Negative LLH:
## LLH: -6723.005 norm LLH: -3.361502
## mu omega alpha1 beta1
## 7.369498e-04 6.541615e-06 1.140803e-01 8.060773e-01
##
## R-optimhess Difference Approximated Hessian Matrix:
## mu omega alpha1 beta1
## mu -31433050.11 -3.035042e+08 7493.64 -17309.08
## omega -303504217.89 -7.277616e+12 -276913204.28 -450073505.20
## alpha1 7493.64 -2.769132e+08 -20890.60 -21533.02
## beta1 -17309.08 -4.500735e+08 -21533.02 -30843.18
## attr(,"time")
## Time difference of 0.03796792 secs
##
## --- END OF TRACE ---
##
##
## Time to Estimate Parameters:
## Time difference of 0.09894109 secs
##
## Title:
## GARCH Modelling
##
## Call:
## garchFit(formula = ~garch(1, 1), data = nyse)
##
## Mean and Variance Equation:
## data ~ garch(1, 1)
## <environment: 0x7fed8e3930c8>
## [data = nyse]
##
## Conditional Distribution:
## norm
##
## Coefficient(s):
## mu omega alpha1 beta1
## 7.3695e-04 6.5416e-06 1.1408e-01 8.0608e-01
##
## Std. Errors:
## based on Hessian
##
## Error Analysis:
## Estimate Std. Error t value Pr(>|t|)
## mu 7.369e-04 1.786e-04 4.126 3.69e-05 ***
## omega 6.542e-06 1.455e-06 4.495 6.94e-06 ***
## alpha1 1.141e-01 1.604e-02 7.114 1.13e-12 ***
## beta1 8.061e-01 2.973e-02 27.112 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Log Likelihood:
## 6723.005 normalized: 3.361502
##
## Description:
## Sat May 23 15:59:47 2020 by user:
##
##
## Standardised Residuals Tests:
## Statistic p-Value
## Jarque-Bera Test R Chi^2 3628.415 0
## Shapiro-Wilk Test R W 0.9515562 0
## Ljung-Box Test R Q(10) 29.69242 0.0009616813
## Ljung-Box Test R Q(15) 30.50938 0.01021164
## Ljung-Box Test R Q(20) 32.81143 0.03538324
## Ljung-Box Test R^2 Q(10) 3.510505 0.9667405
## Ljung-Box Test R^2 Q(15) 4.408852 0.9960585
## Ljung-Box Test R^2 Q(20) 6.68935 0.9975864
## LM Arch Test R TR^2 3.967784 0.9840107
##
## Information Criterion Statistics:
## AIC BIC SIC HQIC
## -6.719005 -6.707803 -6.719013 -6.714891
summary(garchFit(~garch(2,2),nyse))
##
## Series Initialization:
## ARMA Model: arma
## Formula Mean: ~ arma(0, 0)
## GARCH Model: garch
## Formula Variance: ~ garch(2, 2)
## ARMA Order: 0 0
## Max ARMA Order: 0
## GARCH Order: 2 2
## Max GARCH Order: 2
## Maximum Order: 2
## Conditional Dist: norm
## h.start: 3
## llh.start: 1
## Length of Series: 2000
## Recursion Init: mci
## Series Scale: 0.009862128
##
## Parameter Initialization:
## Initial Parameters: $params
## Limits of Transformations: $U, $V
## Which Parameters are Fixed? $includes
## Parameter Matrix:
## U V params includes
## mu -0.47751157 0.4775116 0.04775116 TRUE
## omega 0.00000100 100.0000000 0.10000000 TRUE
## alpha1 0.00000001 1.0000000 0.05000000 TRUE
## alpha2 0.00000001 1.0000000 0.05000000 TRUE
## gamma1 -0.99999999 1.0000000 0.10000000 FALSE
## gamma2 -0.99999999 1.0000000 0.10000000 FALSE
## beta1 0.00000001 1.0000000 0.40000000 TRUE
## beta2 0.00000001 1.0000000 0.40000000 TRUE
## delta 0.00000000 2.0000000 2.00000000 FALSE
## skew 0.10000000 10.0000000 1.00000000 FALSE
## shape 1.00000000 10.0000000 4.00000000 FALSE
## Index List of Parameters to be Optimized:
## mu omega alpha1 alpha2 beta1 beta2
## 1 2 3 4 7 8
## Persistence: 0.9
##
##
## --- START OF TRACE ---
## Selected Algorithm: nlminb
##
## R coded nlminb Solver:
##
## 0: 2545.5407: 0.0477512 0.100000 0.0500000 0.0500000 0.400000 0.400000
## 1: 2532.0400: 0.0477525 0.0903410 0.0503365 0.0464330 0.392922 0.392857
## 2: 2519.4286: 0.0477619 0.0758030 0.0812173 0.0511406 0.387829 0.387617
## 3: 2516.6880: 0.0478004 0.0719527 0.108783 0.0317149 0.380727 0.381498
## 4: 2514.4575: 0.0478468 0.0946886 0.128150 0.0145461 0.374834 0.377241
## 5: 2513.0100: 0.0478895 0.0942306 0.131508 0.00411633 0.369979 0.373422
## 6: 2512.9475: 0.0480706 0.0919428 0.133022 1.00000e-08 0.375954 0.381750
## 7: 2512.5162: 0.0481726 0.0868194 0.131158 1.00000e-08 0.376452 0.382950
## 8: 2512.3916: 0.0485484 0.0820068 0.130526 1.00000e-08 0.382237 0.387942
## 9: 2512.2060: 0.0496484 0.0831703 0.133068 1.00000e-08 0.381198 0.381935
## 10: 2512.1501: 0.0506613 0.0892665 0.138917 1.00000e-08 0.377390 0.375630
## 11: 2511.9816: 0.0517715 0.0850583 0.137198 1.00000e-08 0.378903 0.379098
## 12: 2511.9160: 0.0539673 0.0784120 0.133802 1.00000e-08 0.384213 0.389293
## 13: 2511.6972: 0.0562427 0.0784415 0.134483 1.00000e-08 0.378674 0.390041
## 14: 2511.5360: 0.0585190 0.0815826 0.137180 1.00000e-08 0.375407 0.388544
## 15: 2511.3778: 0.0632333 0.0808377 0.136229 1.00000e-08 0.405984 0.358803
## 16: 2511.2290: 0.0678814 0.0816100 0.137359 1.00000e-08 0.372546 0.389997
## 17: 2511.2205: 0.0701079 0.0802418 0.138295 1.00000e-08 0.379152 0.387685
## 18: 2511.1480: 0.0711607 0.0821857 0.142179 1.00000e-08 0.371971 0.387529
## 19: 2511.1368: 0.0722607 0.0809768 0.140672 1.00000e-08 0.376254 0.385605
## 20: 2511.1304: 0.0733844 0.0815530 0.141089 1.00000e-08 0.376647 0.384152
## 21: 2511.1288: 0.0744337 0.0816518 0.141413 1.00000e-08 0.377243 0.383178
## 22: 2511.1287: 0.0744020 0.0817745 0.141337 1.00000e-08 0.377275 0.383016
## 23: 2511.1287: 0.0743936 0.0817350 0.141360 1.00000e-08 0.377250 0.383085
## 24: 2511.1287: 0.0743941 0.0817349 0.141360 1.00000e-08 0.377250 0.383085
##
## Final Estimate of the Negative LLH:
## LLH: -6726.978 norm LLH: -3.363489
## mu omega alpha1 alpha2 beta1
## 7.336844e-04 7.949665e-06 1.413597e-01 1.000000e-08 3.772499e-01
## beta2
## 3.830851e-01
##
## R-optimhess Difference Approximated Hessian Matrix:
## mu omega alpha1 alpha2
## mu -3.132819e+07 -2.634055e+08 6.339972e+03 1.933046e+03
## omega -2.634055e+08 -4.787388e+12 -1.860717e+08 -2.026262e+08
## alpha1 6.339972e+03 -1.860717e+08 -1.377313e+04 -1.330570e+04
## alpha2 1.933046e+03 -2.026262e+08 -1.330570e+04 -1.539578e+04
## beta1 -1.440569e+04 -2.957359e+08 -1.438327e+04 -1.590971e+04
## beta2 -1.627753e+04 -2.988452e+08 -1.439136e+04 -1.567340e+04
## beta1 beta2
## mu -14405.69 -16277.53
## omega -295735878.61 -298845244.23
## alpha1 -14383.27 -14391.36
## alpha2 -15909.71 -15673.40
## beta1 -20334.26 -20490.65
## beta2 -20490.65 -20755.21
## attr(,"time")
## Time difference of 0.161231 secs
##
## --- END OF TRACE ---
##
##
## Time to Estimate Parameters:
## Time difference of 0.297029 secs
##
## Title:
## GARCH Modelling
##
## Call:
## garchFit(formula = ~garch(2, 2), data = nyse)
##
## Mean and Variance Equation:
## data ~ garch(2, 2)
## <environment: 0x7fed8e913910>
## [data = nyse]
##
## Conditional Distribution:
## norm
##
## Coefficient(s):
## mu omega alpha1 alpha2 beta1 beta2
## 7.3368e-04 7.9497e-06 1.4136e-01 1.0000e-08 3.7725e-01 3.8309e-01
##
## Std. Errors:
## based on Hessian
##
## Error Analysis:
## Estimate Std. Error t value Pr(>|t|)
## mu 7.337e-04 1.791e-04 4.098 4.18e-05 ***
## omega 7.950e-06 3.367e-06 2.361 0.0182 *
## alpha1 1.414e-01 2.408e-02 5.872 4.32e-09 ***
## alpha2 1.000e-08 6.478e-02 0.000 1.0000
## beta1 3.772e-01 2.871e-01 1.314 0.1889
## beta2 3.831e-01 2.067e-01 1.853 0.0639 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Log Likelihood:
## 6726.978 normalized: 3.363489
##
## Description:
## Sat May 23 15:59:48 2020 by user:
##
##
## Standardised Residuals Tests:
## Statistic p-Value
## Jarque-Bera Test R Chi^2 3283.62 0
## Shapiro-Wilk Test R W 0.9532771 0
## Ljung-Box Test R Q(10) 28.66327 0.001412368
## Ljung-Box Test R Q(15) 29.53388 0.01371783
## Ljung-Box Test R Q(20) 31.75336 0.04599833
## Ljung-Box Test R^2 Q(10) 2.732222 0.9870426
## Ljung-Box Test R^2 Q(15) 3.796005 0.9983323
## Ljung-Box Test R^2 Q(20) 6.058174 0.9988166
## LM Arch Test R TR^2 3.330788 0.9927238
##
## Information Criterion Statistics:
## AIC BIC SIC HQIC
## -6.720978 -6.704175 -6.720996 -6.714808
# m1.sim=garch(x=garch01.sim, order=c(0,1)) # order=c(0,1) specifies an ARCH(1) model
arma = arima(nyse,order=c(1,0,1))
arma.re = arma$residuals
acf2(arma.re)
## ACF PACF
## [1,] 0.00 0.00
## [2,] -0.01 -0.01
## [3,] -0.06 -0.06
## [4,] -0.03 -0.03
## [5,] 0.07 0.07
## [6,] 0.01 0.00
## [7,] 0.00 0.00
## [8,] -0.02 -0.01
## [9,] -0.01 -0.01
## [10,] -0.01 -0.02
## [11,] 0.01 0.01
## [12,] 0.02 0.01
## [13,] -0.03 -0.03
## [14,] 0.03 0.03
## [15,] -0.03 -0.02
## [16,] 0.04 0.04
## [17,] -0.02 -0.02
## [18,] -0.02 -0.02
## [19,] -0.03 -0.03
## [20,] -0.01 -0.01
## [21,] -0.01 -0.01
## [22,] -0.01 -0.01
## [23,] 0.00 0.00
## [24,] 0.02 0.02
## [25,] -0.02 -0.02
## [26,] -0.03 -0.03
## [27,] 0.01 0.01
## [28,] 0.04 0.03
## [29,] 0.05 0.05
## [30,] 0.01 0.00
## [31,] -0.03 -0.02
## [32,] 0.07 0.07
## [33,] 0.05 0.05
## [34,] 0.00 0.00
## [35,] -0.02 -0.01
## [36,] -0.02 -0.01
## [37,] 0.02 0.02
## [38,] -0.04 -0.05
## [39,] -0.04 -0.04
## [40,] -0.03 -0.03
## [41,] -0.01 -0.01
## [42,] 0.02 0.01
## [43,] 0.02 0.01
## [44,] 0.01 0.00
## [45,] 0.01 0.02
## [46,] 0.00 0.00
## [47,] 0.00 0.01
## [48,] -0.01 -0.01
## [49,] -0.03 -0.04
## [50,] -0.01 -0.01
## [51,] 0.04 0.04
## [52,] -0.05 -0.05
## [53,] 0.02 0.02
## [54,] -0.05 -0.03
## [55,] -0.06 -0.05
acf2(arma.re^2)
## ACF PACF
## [1,] 0.12 0.12
## [2,] 0.19 0.18
## [3,] 0.14 0.10
## [4,] 0.03 -0.03
## [5,] 0.21 0.18
## [6,] 0.06 0.01
## [7,] 0.02 -0.06
## [8,] 0.07 0.03
## [9,] 0.05 0.05
## [10,] 0.01 -0.05
## [11,] 0.02 -0.01
## [12,] 0.01 0.02
## [13,] 0.01 0.00
## [14,] 0.01 -0.01
## [15,] 0.02 0.03
## [16,] 0.01 0.01
## [17,] 0.00 -0.01
## [18,] 0.01 0.01
## [19,] 0.02 0.02
## [20,] 0.00 -0.01
## [21,] 0.01 0.00
## [22,] 0.00 0.00
## [23,] 0.02 0.01
## [24,] 0.02 0.01
## [25,] 0.00 -0.01
## [26,] 0.01 0.00
## [27,] 0.02 0.02
## [28,] 0.00 -0.01
## [29,] 0.05 0.04
## [30,] 0.01 0.00
## [31,] 0.00 -0.02
## [32,] 0.03 0.02
## [33,] 0.01 0.01
## [34,] 0.01 -0.01
## [35,] 0.02 0.01
## [36,] 0.01 0.01
## [37,] 0.01 0.00
## [38,] 0.01 -0.01
## [39,] 0.02 0.02
## [40,] 0.00 -0.01
## [41,] 0.02 0.00
## [42,] 0.01 0.00
## [43,] 0.02 0.02
## [44,] 0.00 -0.02
## [45,] 0.00 -0.01
## [46,] 0.00 0.00
## [47,] 0.01 0.01
## [48,] 0.02 0.01
## [49,] 0.00 0.00
## [50,] 0.01 0.00
## [51,] 0.02 0.02
## [52,] 0.03 0.03
## [53,] 0.01 -0.01
## [54,] 0.01 0.00
## [55,] 0.00 0.00
#prediction
u = nyse.g@sigma.t
plot(window(nyse, start=900, end=1000), ylim=c(-.22,.2), ylab="NYSE Returns")
lines(window(nyse-2*u, start=900, end=1000), lty=2, col=4)
lines(window(nyse+2*u, start=900, end=1000), lty=2, col=4)
* test + hypothesis: Autoregression of residuals is different from 0. + p-value of Box Ljung test is greater than 0.05, and so we can reject the hypothesis that the autocorrelation of residuals is different from 0. The model thus adequately represents the residuals. + Jarque– Bera statistic tests the residuals of the fit for normality based on the observed skewness and kurtosis, and it appears that the residuals have some nonnormal skewness and kurtosis. + The Shapiro–Wilk statistic tests the residuals of the fit for normality based on the empirical order statistics.
Extensions to GARCH * Various extensions to the original model have been proposed to overcome some of the shortcomings we have just mentioned. * S-PLUS GARCH model: will fit some non- normal, albeit symmetric, distributions. * EGARCH (exponential GARCH) model: For asymmetric return dynamics, which is a complex model that has different components for positive returns and for negative returns. * Integrated GARCH (IGARCH) model: In the case of persistence in volatility.